{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#基础index可以合成使用\n",
    "##数据准备"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np \n",
    "d = 512          #维数\n",
    "n_data = 2000   \n",
    "np.random.seed(0) \n",
    "data = []\n",
    "mu = 3\n",
    "sigma = 0.1\n",
    "for i in range(n_data):\n",
    "    data.append(np.random.normal(mu, sigma, d))\n",
    "data = np.array(data).astype('float32')\n",
    "\n",
    "#query\n",
    "query = []\n",
    "n_query = 10\n",
    "np.random.seed(12) \n",
    "query = []\n",
    "for i in range(n_query):\n",
    "    query.append(np.random.normal(mu, sigma, d))\n",
    "query = np.array(query).astype('float32')\n",
    "\n",
    "#导入faiss\n",
    "import sys\n",
    "sys.path.append('/home/maliqi/faiss/python/')\n",
    "import faiss"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##乘积量化作为cell-probe方法的粗量化器\n",
    "乘积量化也可以作为粗量化器。其中有两个参数，聚类中心数c，维度划分m,这样每个划分都有c个倒排表，一种有c^m个倒排表。实际使用中，一般直接让m=2。  \n",
    "MultiIndexQuantizer也经常与IndexFlat对比，以便选取合适的参数。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[1269 1028 1895  120 1267  178  178 1061 1972 1029]\n",
      " [1398  289   70 1023 1177  940  969  969 1568  700]\n",
      " [ 345  389 1904 1992 1992 1612 1623 1632  539  366]\n",
      " [ 112 1412 1624  879  394 1506 1398 1326   91  440]\n",
      " [  94 1459 1517 1723 1255   66  238  238 1755  472]\n",
      " [ 574  574 1523   91  456  296  444 1384  103 1230]\n",
      " [1391  876   91 1914   78  969  732  999  277 1158]\n",
      " [1662 1654  722 1070  121 1496  631  631 1442 1738]\n",
      " [ 154   99   31 1237  289  661  426 1008 1727  744]\n",
      " [ 375 1826  610  750  750 1430  459 1339  471  441]]\n"
     ]
    }
   ],
   "source": [
    "nbits_mi = 5  # c\n",
    "M_mi = 2       # m\n",
    "coarse_quantizer_mi = faiss.MultiIndexQuantizer(d, M_mi, nbits_mi) #不需要add任何数据\n",
    "ncentroids_mi = 2 ** (M_mi * nbits_mi)\n",
    "\n",
    "index = faiss.IndexIVFFlat(coarse_quantizer_mi, d, ncentroids_mi)\n",
    "index.quantizer_trains_alone = True  #表示这是粗量化器的flag\n",
    "index.train(data)\n",
    "index.add(data)\n",
    "index.nprobe = 50\n",
    "dis, ind = index.search(query, 10)\n",
    "print(ind)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##Pre-filtering PQ codes with polysemous codes\n",
    "todo"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
